This script visualizes the full dataset as well as highlighting the sub-populations in the gland cells.
library(scran)
library(scater)
library(DropletUtils)
library(openxlsx)
library(Rtsne)
library(pheatmap)
library(viridis)
library(sceasy) #devtools::install_github("cellgeni/sceasy")
library(reticulate)
# library(umap)
source("~/Dropbox/Postdoc/git/BEOrigin/Analysis/Functions/auxiliary.R")
set.seed(123456)
Fix the annotation of all data and add annotation for cell type.
# Read in the normalized and batch-corrected reads
sce.all <- readRDS("~/Dropbox/Postdoc/2019-12-29_BE2020/Website_data/old/alldata.rds")
annot.col <- vector(length = 8)
names(annot.col) <- unique(colData(sce.all)$Tissue)
annot.col["NSCJ"] <- "#F39B7F"
annot.col["BSCJ"] <- "#DC0000"
annot.col["NE"] <- "#8B0000"
annot.col["NG"] <- "#4DBBD5"
annot.col["BE"] <- "#00A087"
annot.col["ND"] <- "#3C5488"
annot.col["SMG"] <- "#B09C85"
annot.col["GM"] <- "#BFE7E1"
tsne.corrected <- ggplot(data.frame(tsne1 = reducedDims(sce.all)$TSNE[,1], tsne2 = reducedDims(sce.all)$TSNE[,2],
tissue = colData(sce.all)$Tissue)) +
geom_point(aes(tsne1, tsne2, colour = tissue)) + scale_color_manual(values = annot.col) + theme_minimal()
tsne.corrected
# Replace samples name
colData(sce.all)$Sample <- sub(".+(Patient.+?_.+?_.+?)_.+", "\\1", colData(sce.all)$Sample, perl = TRUE)
# Introduce cell type for known cells
sce.good <- readRDS("~/Dropbox/Postdoc/2019-12-29_BE2020/All_corrected_sce_filtered.rds")
colData(sce.good)$Sample <- sub(".+(Patient.+?_.+?_.+?)_.+", "\\1", colData(sce.good)$Sample, perl = TRUE)
colData(sce.all)$cell_type<-"Not_assessed"
colData(sce.all)$cell_type_secondary<-"Not_assessed"
colData(sce.all)$tissue_type<-"Not_assessed"
colData(sce.all)$cell_type[match(paste0(colData(sce.good)$Sample, colData(sce.good)$Barcode),paste0(colData(sce.all)$Sample, colData(sce.all)$Barcode))] <- colData(sce.good)$cell_type
colData(sce.all)$cell_type_secondary[match(paste0(colData(sce.good)$Sample, colData(sce.good)$Barcode),paste0(colData(sce.all)$Sample, colData(sce.all)$Barcode))] <- colData(sce.good)$cell_type_secondary
colData(sce.all)$tissue_type[match(paste0(colData(sce.good)$Sample, colData(sce.good)$Barcode),paste0(colData(sce.all)$Sample, colData(sce.all)$Barcode))] <- colData(sce.good)$tissue_type
# change the order of columns and keep all continous columns
colData(sce.all)<-colData(sce.all)[,c(4,3,1,15:17,6:14)]
#randomise data
set.seed(50014)
jumbled<-sample(1:ncol(sce.all), ncol(sce.all), replace = FALSE)
sce.all<-sce.all[,jumbled]
# colData(sce.all)<-colData(sce.all)[jumbled,]
# reducedDims(sce.all)$TSNE<-reducedDims(sce.all)$TSNE[jumbled,]
# Change coldata into factors
colData(sce.all)$Tissue <- factor(colData(sce.all)$Tissue, levels = c("NSCJ", "BSCJ", "NE", "NG", "BE", "ND", "SMG", "GM"))
colData(sce.all)$Patient <- factor(colData(sce.all)$Patient, levels = sort(unique(colData(sce.all)$Patient)))
colData(sce.all)$Sample <- factor(colData(sce.all)$Sample, levels = sort(unique(colData(sce.all)$Sample)))
cell.levels = c(
"Basal",
"Suprabasal",
"Intermediate",
"Superficial",
"Undifferentiated",
"Endocrine",
"Foveolar_Intermediate",
"Foveolar_differentiated",
"Parietal",
"Chief",
"Columnar_Undifferentiated",
"Columnar_Intermediate",
"Columnar_differentiated",
"Enterocytes_Intermediate",
"Enterocytes_differentiated",
"Paneth",
"Goblet",
"KRT5_cells",
"KRT5.KRT7_cells",
"KRT7_cells",
"MUC5B_cells",
"Mucous",
"Oncocytes",
"Duct_Intercalating",
"Myo-epithelial",
"Unknown.Doublets",
"Immune",
"Stromal",
"Squamous_Esophagus",
"Not_assessed"
)
colData(sce.all)$cell_type <- factor(colData(sce.all)$cell_type, levels = cell.levels)
levels(colData(sce.all)$cell_type)[18:21]<-paste0("C", 1:4)
cell.levels.2 = c(
"Basal",
"Suprabasal",
"Suprabasal_Dividing",
"Intermediate",
"Superficial",
"Undifferentiated",
"Undifferentiated_Dividing",
"Foveolar_Intermediate",
"Foveolar_differentiated",
"Endocrine_NEUROG3",
"Endocrine_GHRL",
"Endocrine_CHGA",
"Endocrine_NEUROD1",
"Parietal",
"Chief",
"Columnar_Undifferentiated",
"Columnar_Undifferentiated_Dividing",
"Columnar_Intermediate",
"Columnar_differentiated",
"KRT5_cells",
"KRT5.KRT7_cells",
"MUC5B_cells",
"KRT7_cells",
"Enterocytes_Intermediate",
"Enterocytes_differentiated",
"Paneth",
"Goblet",
"Mucous_MUC5B_High",
"Oncocytes_MUC5B_Low",
"Duct_Intercalating",
"Myo-epithelial",
"Immune_T-cells",
"Immune_B-cells",
"Immune_Macrophages",
"Stromal_CALD1_cells",
"Stromal_GNG11_cells",
"Stromal_ADH1B_cells",
"Squamous_Esophagus",
"Unknown.Doublets",
"Not_assessed"
)
colData(sce.all)$cell_type_secondary <- factor(colData(sce.all)$cell_type_secondary, levels = cell.levels.2)
levels(colData(sce.all)$cell_type_secondary)[20:23]<-paste0("C", 1:4)
colData(sce.all)$tissue_type <- factor(colData(sce.all)$tissue_type, levels = c("Squamous", "Columnar", "Glandular", "NonEpithelial", "Not_assessed"))
# Fix the names of genes to make them Refseq
# Remove duplicated genes names
sce.all<-sce.all[!duplicated(rowData(sce.all)$Symbol),]
# Move Symbol to row name to keep it easy to loead into the data
rownames(sce.all)<-rowData(sce.all)$Symbol
# randomise expression values nad moved htem to log2 space
counts(sce.all)<-logcounts(sce.all)#[,jumbled]
gene<-"KRT5"
p.gene.expression<-ggplot(data.frame(tSNE1 = reducedDims(sce.all)$TSNE[,1],
tSNE2 = reducedDims(sce.all)$TSNE[,2],
gene = counts(sce.all)[rowData(sce.all)$Symbol == gene,])) +
# gene = logcounts(sce.all)[rowData(sce.all)$Symbol == gene,][jumbled])) +
geom_point(aes(tSNE1, tSNE2, colour = gene), size = 0.5) + scale_colour_viridis(option = "A", name = "log2(Expr)") +
guides(alpha=FALSE) + ggtitle(gene) + theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.background = element_blank(),
axis.line = element_line(colour = "grey"))
p.gene.expression
tsne.corrected <- ggplot(data.frame(tsne1 = reducedDims(sce.all)$TSNE[,1], tsne2 = reducedDims(sce.all)$TSNE[,2],
Tissue = colData(sce.all)$Tissue)) +
geom_point(aes(tsne1, tsne2, colour = Tissue)) + scale_color_manual(values = annot.col) + theme_void()
tsne.corrected
ggsave("~/Dropbox/Postdoc/2019-12-29_BE2020/Website_data/alldata.pdf",
tsne.corrected,
width = 8, height = 7, useDingbats = FALSE)
sceasy:::convertFormat(sce.all, from="sce", to="anndata",
outFile="/home/karolno/Dropbox/Postdoc/2019-12-29_BE2020/Website_data/alldata.h5ad")
## Warning in .regularise_df(as.data.frame(SummarizedExperiment::rowData(obj)), :
## Dropping single category variables:Type
## AnnData object with n_obs × n_vars = 50569 × 56852
## obs: 'Tissue', 'Patient', 'Sample', 'cell_type', 'cell_type_secondary', 'tissue_type', 'total_features_by_counts', 'log10_total_features_by_counts', 'total_counts', 'log10_total_counts', 'pct_counts_in_top_50_features', 'pct_counts_in_top_100_features', 'pct_counts_in_top_200_features', 'pct_counts_in_top_500_features', 'Clusters'
## var: 'ID', 'Symbol'
## obsm: 'X_tsne'
saveRDS(sce.all, "~/Dropbox/Postdoc/2019-12-29_BE2020/Website_data/alldata.rds")
Fix the annotation of all high quality data.
# Read in the normalized and batch-corrected reads
sce.all <- readRDS("~/Dropbox/Postdoc/2019-12-29_BE2020/Website_data/old/alldatahighquality.rds")
annot.col <- vector(length = 6)
names(annot.col) <- unique(colData(sce.all)$Tissue)
annot.col["NSCJ"] <- "#F39B7FFF"
annot.col["BSCJ"] <- "#DC0000FF"
annot.col["NE"] <- "darkred"
annot.col["NG"] <- "#4DBBD5FF"
annot.col["BE"] <- "#00A087FF"
# annot.col["ND"] <- "#3C5488FF"
annot.col["SMG"] <- "#B09C85FF"
# annot.col["GM"] <- colorRampPalette(c("white", "#00A087FF"))(17)[c(5)]
tsne.corrected <- ggplot(data.frame(tsne1 = reducedDims(sce.all)$TSNE[,1], tsne2 = reducedDims(sce.all)$TSNE[,2],
tissue = colData(sce.all)$Tissue)) +
geom_point(aes(tsne1, tsne2, colour = tissue)) + scale_color_manual(values = annot.col) + theme_minimal()
tsne.corrected
# Replace samples name
colData(sce.all)$Sample <- sub(".+(Patient.+?_.+?_.+?)_.+", "\\1", colData(sce.all)$Sample, perl = TRUE)
# change the order of columns and keep all continous columns
colData(sce.all)<-colData(sce.all)[,c(4,3,1,17:19,21, 6:13)]
#randomise data
set.seed(50014)
jumbled<-sample(1:ncol(sce.all), ncol(sce.all), replace = FALSE)
sce.all<-sce.all[,jumbled]
# colData(sce.all)<-colData(sce.all)[jumbled,]
# reducedDims(sce.all)$TSNE<-reducedDims(sce.all)$TSNE[jumbled,]
# Change coldata into factors
colData(sce.all)$Tissue <- factor(colData(sce.all)$Tissue, levels = c("NSCJ", "BSCJ", "NE", "NG", "BE", "SMG"))
colData(sce.all)$Patient <- factor(colData(sce.all)$Patient, levels = sort(unique(colData(sce.all)$Patient)))
colData(sce.all)$Sample <- factor(colData(sce.all)$Sample, levels = sort(unique(colData(sce.all)$Sample)))
cell.levels = c(
"Basal",
"Suprabasal",
"Intermediate",
"Superficial",
"Undifferentiated",
"Endocrine",
"Foveolar_Intermediate",
"Foveolar_differentiated",
"Parietal",
"Chief",
"Columnar_Undifferentiated",
"Columnar_Intermediate",
"Columnar_differentiated",
"Enterocytes_Intermediate",
"Enterocytes_differentiated",
"Paneth",
"Goblet",
"KRT5_cells",
"KRT5.KRT7_cells",
"KRT7_cells",
"MUC5B_cells",
"Mucous",
"Oncocytes",
"Duct_Intercalating",
"Myo-epithelial",
"Unknown.Doublets",
"Immune",
"Stromal",
"Squamous_Esophagus"#,
# "Not_assessed"
)
colData(sce.all)$cell_type <- factor(colData(sce.all)$cell_type, levels = cell.levels)
levels(colData(sce.all)$cell_type)[18:21]<-paste0("C", 1:4)
cell.levels.2 = c(
"Basal",
"Suprabasal",
"Suprabasal_Dividing",
"Intermediate",
"Superficial",
"Undifferentiated",
"Undifferentiated_Dividing",
"Foveolar_Intermediate",
"Foveolar_differentiated",
"Endocrine_NEUROG3",
"Endocrine_GHRL",
"Endocrine_CHGA",
"Endocrine_NEUROD1",
"Parietal",
"Chief",
"Columnar_Undifferentiated",
"Columnar_Undifferentiated_Dividing",
"Columnar_Intermediate",
"Columnar_differentiated",
"KRT5_cells",
"KRT5.KRT7_cells",
"KRT7_cells",
"MUC5B_cells",
"Enterocytes_Intermediate",
"Enterocytes_differentiated",
"Paneth",
"Goblet",
"Mucous_MUC5B_High",
"Oncocytes_MUC5B_Low",
"Duct_Intercalating",
"Myo-epithelial",
"Immune_T-cells",
"Immune_B-cells",
"Immune_Macrophages",
"Stromal_CALD1_cells",
"Stromal_GNG11_cells",
"Stromal_ADH1B_cells",
"Squamous_Esophagus",
"Unknown.Doublets"#,
# "Not_assessed"
)
colData(sce.all)$cell_type_secondary <- factor(colData(sce.all)$cell_type_secondary, levels = cell.levels.2)
levels(colData(sce.all)$cell_type_secondary)[18:21]<-paste0("C", 1:4)
colData(sce.all)$tissue_type <- factor(colData(sce.all)$tissue_type, levels = c("Squamous", "Columnar", "Glandular", "NonEpithelial"))
# Change clustering output into factors
# colData(sce.all)$Clusters <- factor(colData(sce.all)$Clusters, levels = sort(unique(colData(sce.all)$Clusters)))
# colData(sce.all)$Tissue_cluster <- factor(colData(sce.all)$Tissue_cluster, levels = sort(unique(colData(sce.all)$Tissue_cluster)))
# colData(sce.all)$Global_cluster <- factor(colData(sce.all)$Global_cluster, levels = sort(unique(colData(sce.all)$Global_cluster)))
colData(sce.all)$confidence <- factor(colData(sce.all)$confidence, levels = sort(unique(colData(sce.all)$confidence), decreasing = TRUE))
# colnames(colData(sce.all))[8] <- "Clustering_per_sample"
# Fix the names of genes to make them Refseq
# Remove duplicated genes names
sce.all<-sce.all[!duplicated(rowData(sce.all)$Symbol),]
# Move Symbol to row name to keep it easy to loead into the data
rownames(sce.all)<-rowData(sce.all)$Symbol
# randomise expression values nad moved htem to log2 space
counts(sce.all)<-logcounts(sce.all)#[,jumbled]
gene<-"KRT5"
p.gene.expression<-ggplot(data.frame(tSNE1 = reducedDims(sce.all)$TSNE[,1],
tSNE2 = reducedDims(sce.all)$TSNE[,2],
gene = counts(sce.all)[rowData(sce.all)$Symbol == gene,])) +
# gene = logcounts(sce.all)[rowData(sce.all)$Symbol == gene,][jumbled])) +
geom_point(aes(tSNE1, tSNE2, colour = gene), size = 0.5) + scale_colour_viridis(option = "A", name = "log2(Expr)") +
guides(alpha=FALSE) + ggtitle(gene) + theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.background = element_blank(),
axis.line = element_line(colour = "grey"))
p.gene.expression
tsne.corrected <- ggplot(data.frame(tsne1 = reducedDims(sce.all)$TSNE[,1], tsne2 = reducedDims(sce.all)$TSNE[,2],
Tissue = colData(sce.all)$Tissue)) +
geom_point(aes(tsne1, tsne2, colour = Tissue)) + scale_color_manual(values = annot.col) + theme_void()
tsne.corrected
ggsave("~/Dropbox/Postdoc/2019-12-29_BE2020/Website_data/alldatahighquality.pdf",
tsne.corrected,
width = 8, height = 7, useDingbats = FALSE)
sceasy:::convertFormat(sce.all, from="sce", to="anndata",
outFile="/home/karolno/Dropbox/Postdoc/2019-12-29_BE2020/Website_data/alldatahighquality.h5ad")
## AnnData object with n_obs × n_vars = 39882 × 56852
## obs: 'Tissue', 'Patient', 'Sample', 'cell_type', 'cell_type_secondary', 'tissue_type', 'confidence', 'total_features_by_counts', 'log10_total_features_by_counts', 'total_counts', 'log10_total_counts', 'pct_counts_in_top_50_features', 'pct_counts_in_top_100_features', 'pct_counts_in_top_200_features', 'pct_counts_in_top_500_features'
## var: 'ID', 'Symbol'
## obsm: 'X_tsne'
saveRDS(sce.all, "~/Dropbox/Postdoc/2019-12-29_BE2020/Website_data/alldatahighquality.rds")
Fix the annotation of individual tissues
for (tissue in c("SMG", "NSCJ", "NE", "NG", "ND", "BE")) {
# Read in the normalized and batch-corrected reads
sce.all <- readRDS(paste0("~/Dropbox/Postdoc/2019-12-29_BE2020/Website_data/old/", tissue, ".rds"))
annot.col <- vector(length = 8)
names(annot.col) <- unique(colData(sce.all)$Tissue)
annot.col["NSCJ"] <- "#F39B7FFF"
annot.col["BSCJ"] <- "#DC0000FF"
annot.col["NE"] <- "darkred"
annot.col["NG"] <- "#4DBBD5FF"
annot.col["BE"] <- "#00A087FF"
annot.col["ND"] <- "#3C5488FF"
annot.col["SMG"] <- "#B09C85FF"
annot.col["GM"] <- colorRampPalette(c("white", "#00A087FF"))(17)[c(5)]
tsne.corrected <- ggplot(data.frame(tsne1 = reducedDims(sce.all)$TSNE[,1], tsne2 = reducedDims(sce.all)$TSNE[,2],
tissue = colData(sce.all)$Tissue)) +
geom_point(aes(tsne1, tsne2, colour = tissue)) + scale_color_manual(values = annot.col) + theme_minimal()
print(tsne.corrected)
# Replace samples name
colData(sce.all)$Sample <- sub(".+(Patient.+?_.+?_.+?)_.+", "\\1", colData(sce.all)$Sample, perl = TRUE)
# change the order of columns and keep all continous columns
colData(sce.all)<-colData(sce.all)[,c(3,1,17:19,21, 6:13)]
#randomise data
set.seed(50014)
jumbled<-sample(1:ncol(sce.all), ncol(sce.all), replace = FALSE)
sce.all<-sce.all[,jumbled]
# colData(sce.all)<-colData(sce.all)[jumbled,]
# reducedDims(sce.all)$TSNE<-reducedDims(sce.all)$TSNE[jumbled,]
# Change coldata into factors
# colData(sce.all)$Tissue <- factor(colData(sce.all)$Tissue, levels = c("NSCJ", "BSCJ", "NE", "NG", "BE", "ND", "SMG"))
colData(sce.all)$Patient <- factor(colData(sce.all)$Patient, levels = sort(unique(colData(sce.all)$Patient)))
colData(sce.all)$Sample <- factor(colData(sce.all)$Sample, levels = sort(unique(colData(sce.all)$Sample)))
cell.levels = c(
"Basal",
"Suprabasal",
"Intermediate",
"Superficial",
"Undifferentiated",
"Endocrine",
"Foveolar_Intermediate",
"Foveolar_differentiated",
"Parietal",
"Chief",
"Columnar_Undifferentiated",
"Columnar_Intermediate",
"Columnar_differentiated",
"Enterocytes_Intermediate",
"Enterocytes_differentiated",
"Paneth",
"Goblet",
"C1",
"C2",
"C3",
"C4",
"KRT5_cells",
"KRT5.KRT7_cells",
"KRT7_cells",
"MUC5B_cells",
"Mucous",
"Oncocytes",
"Duct_Intercalating",
"Myo-epithelial",
"Unknown.Doublets",
"Immune",
"Stromal",
"Squamous_Esophagus"#,
# "Not_assessed"
)
colData(sce.all)$cell_type <- factor(colData(sce.all)$cell_type, levels = cell.levels[cell.levels %in% unique(colData(sce.all)$cell_type)])
print(levels(colData(sce.all)$cell_type))
cell.levels.2 = c(
"Basal",
"Suprabasal",
"Suprabasal_Dividing",
"Intermediate",
"Superficial",
"Undifferentiated",
"Undifferentiated_Dividing",
"Foveolar_Intermediate",
"Foveolar_differentiated",
"Endocrine_NEUROG3",
"Endocrine_GHRL",
"Endocrine_CHGA",
"Endocrine_NEUROD1",
"Parietal",
"Chief",
"Columnar_Undifferentiated",
"Columnar_Undifferentiated_Dividing",
"Columnar_Intermediate",
"Columnar_differentiated",
"KRT5_cells",
"KRT5.KRT7_cells",
"KRT7_cells",
"MUC5B_cells",
"Enterocytes_Intermediate",
"Enterocytes_differentiated",
"Paneth",
"Goblet",
"Mucous_MUC5B_High",
"Oncocytes_MUC5B_Low",
"Duct_Intercalating",
"Myo-epithelial",
"Immune_T-cells",
"Immune_B-cells",
"Immune_Macrophages",
"Stromal_CALD1_cells",
"Stromal_GNG11_cells",
"Stromal_ADH1B_cells",
"Squamous_Esophagus",
"Unknown.Doublets"#,
# "Not_assessed"
)
colData(sce.all)$cell_type_secondary <- factor(colData(sce.all)$cell_type_secondary, levels = cell.levels.2[cell.levels.2 %in% unique(colData(sce.all)$cell_type_secondary)])
print(levels(colData(sce.all)$cell_type_secondary))
if(tissue == "NSCJ") {
levels(colData(sce.all)$cell_type_secondary)[11:14]<-paste0("C", 1:4)
}
colData(sce.all)$tissue_type <- factor(colData(sce.all)$tissue_type, levels = c("Squamous", "Columnar", "Glandular", "NonEpithelial"))
# Change clustering output into factors
# colData(sce.all)$Clusters <- factor(colData(sce.all)$Clusters, levels = sort(unique(colData(sce.all)$Clusters)))
# colData(sce.all)$Tissue_cluster <- factor(colData(sce.all)$Tissue_cluster, levels = sort(unique(colData(sce.all)$Tissue_cluster)))
# colData(sce.all)$Global_cluster <- factor(colData(sce.all)$Global_cluster, levels = sort(unique(colData(sce.all)$Global_cluster)))
colData(sce.all)$confidence <- factor(colData(sce.all)$confidence, levels = sort(unique(colData(sce.all)$confidence), decreasing = TRUE))
# colnames(colData(sce.all))[8] <- "Clustering_per_sample"
# Fix the names of genes to make them Refseq
# Remove duplicated genes names
sce.all<-sce.all[!duplicated(rowData(sce.all)$Symbol),]
# Move Symbol to row name to keep it easy to loead into the data
rownames(sce.all)<-rowData(sce.all)$Symbol
# randomise expression values nad moved htem to log2 space
counts(sce.all)<-logcounts(sce.all)#[,jumbled]
gene<-"KRT5"
p.gene.expression<-ggplot(data.frame(tSNE1 = reducedDims(sce.all)$TSNE[,1],
tSNE2 = reducedDims(sce.all)$TSNE[,2],
gene = counts(sce.all)[rowData(sce.all)$Symbol == gene,])) +
# gene = logcounts(sce.all)[rowData(sce.all)$Symbol == gene,][jumbled])) +
geom_point(aes(tSNE1, tSNE2, colour = gene), size = 0.5) + scale_colour_viridis(option = "A", name = "log2(Expr)") +
guides(alpha=FALSE) + ggtitle(gene) + theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.background = element_blank(),
axis.line = element_line(colour = "grey"))
print(p.gene.expression)
tsne.corrected <- ggplot(data.frame(tsne1 = reducedDims(sce.all)$TSNE[,1], tsne2 = reducedDims(sce.all)$TSNE[,2],
Cell_Type = colData(sce.all)$cell_type)) +
geom_point(aes(tsne1, tsne2, colour = Cell_Type)) +
# scale_color_manual(values = annot.col) +
theme_void()
print(tsne.corrected)
ggsave(paste0("~/Dropbox/Postdoc/2019-12-29_BE2020/Website_data/", tissue, ".pdf"),
tsne.corrected,
width = 8, height = 7, useDingbats = FALSE)
sceasy:::convertFormat(sce.all, from="sce", to="anndata",
outFile=paste0("/home/karolno/Dropbox/Postdoc/2019-12-29_BE2020/Website_data/", tissue, ".h5ad"))
saveRDS(sce.all, paste0("~/Dropbox/Postdoc/2019-12-29_BE2020/Website_data/", tissue, ".rds"))
}
## [1] "Mucous" "Oncocytes" "Duct_Intercalating"
## [4] "Myo-epithelial" "Immune" "Stromal"
## [1] "Mucous_MUC5B_High" "Oncocytes_MUC5B_Low" "Duct_Intercalating"
## [4] "Myo-epithelial" "Immune_T-cells" "Immune_B-cells"
## [7] "Immune_Macrophages" "Stromal_CALD1_cells" "Stromal_GNG11_cells"
## [10] "Stromal_ADH1B_cells"
## [1] "Basal" "Suprabasal"
## [3] "Intermediate" "Superficial"
## [5] "Undifferentiated" "Endocrine"
## [7] "Foveolar_Intermediate" "Foveolar_differentiated"
## [9] "C1" "C2"
## [11] "C3" "C4"
## [13] "Immune" "Stromal"
## [1] "Basal" "Suprabasal"
## [3] "Suprabasal_Dividing" "Intermediate"
## [5] "Superficial" "Undifferentiated"
## [7] "Foveolar_Intermediate" "Foveolar_differentiated"
## [9] "Endocrine_GHRL" "Endocrine_CHGA"
## [11] "KRT5_cells" "KRT5.KRT7_cells"
## [13] "KRT7_cells" "MUC5B_cells"
## [15] "Immune_T-cells" "Immune_B-cells"
## [17] "Immune_Macrophages" "Stromal_CALD1_cells"
## [19] "Stromal_GNG11_cells"
## Warning in .regularise_df(as.data.frame(SummarizedExperiment::colData(obj)), :
## Dropping single category variables:confidence
## [1] "Basal" "Suprabasal" "Intermediate" "Superficial" "Immune"
## [1] "Basal" "Suprabasal" "Suprabasal_Dividing"
## [4] "Intermediate" "Superficial" "Immune_T-cells"
## [7] "Immune_Macrophages"
## [1] "Undifferentiated" "Endocrine"
## [3] "Foveolar_Intermediate" "Foveolar_differentiated"
## [5] "Parietal" "Chief"
## [7] "Immune" "Stromal"
## [1] "Undifferentiated" "Undifferentiated_Dividing"
## [3] "Foveolar_Intermediate" "Foveolar_differentiated"
## [5] "Endocrine_GHRL" "Endocrine_CHGA"
## [7] "Endocrine_NEUROD1" "Parietal"
## [9] "Chief" "Immune_T-cells"
## [11] "Immune_B-cells" "Immune_Macrophages"
## [13] "Stromal_CALD1_cells" "Stromal_GNG11_cells"
## Warning in .regularise_df(as.data.frame(SummarizedExperiment::colData(obj)), :
## Dropping single category variables:confidence
## [1] "Undifferentiated" "Endocrine"
## [3] "Enterocytes_Intermediate" "Enterocytes_differentiated"
## [5] "Paneth" "Goblet"
## [7] "Immune" "Stromal"
## [1] "Undifferentiated" "Undifferentiated_Dividing"
## [3] "Endocrine_CHGA" "Enterocytes_Intermediate"
## [5] "Enterocytes_differentiated" "Paneth"
## [7] "Goblet" "Immune_T-cells"
## [9] "Immune_B-cells" "Immune_Macrophages"
## [11] "Stromal_CALD1_cells" "Stromal_GNG11_cells"
## Warning in .regularise_df(as.data.frame(SummarizedExperiment::colData(obj)), :
## Dropping single category variables:confidence
## [1] "Endocrine" "Columnar_Undifferentiated"
## [3] "Columnar_Intermediate" "Columnar_differentiated"
## [5] "Goblet" "Immune"
## [7] "Stromal"
## [1] "Endocrine_NEUROG3" "Columnar_Undifferentiated"
## [3] "Columnar_Undifferentiated_Dividing" "Columnar_Intermediate"
## [5] "Columnar_differentiated" "Goblet"
## [7] "Immune_T-cells" "Immune_B-cells"
## [9] "Immune_Macrophages" "Stromal_CALD1_cells"
## [11] "Stromal_GNG11_cells"
## Warning in .regularise_df(as.data.frame(SummarizedExperiment::colData(obj)), :
## Dropping single category variables:confidence
To finish get session info:
sessionInfo()
## R version 3.6.2 (2019-12-12)
## Platform: x86_64-redhat-linux-gnu (64-bit)
## Running under: Fedora 31 (Workstation Edition)
##
## Matrix products: default
## BLAS/LAPACK: /usr/lib64/R/lib/libRblas.so
##
## locale:
## [1] LC_CTYPE=en_GB.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_GB.UTF-8 LC_COLLATE=en_GB.UTF-8
## [5] LC_MONETARY=en_GB.UTF-8 LC_MESSAGES=en_GB.UTF-8
## [7] LC_PAPER=en_GB.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_GB.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] parallel stats4 stats graphics grDevices utils datasets
## [8] methods base
##
## other attached packages:
## [1] destiny_3.0.1 edgeR_3.28.0
## [3] limma_3.42.2 dbscan_1.1-5
## [5] princurve_2.1.4 dynamicTreeCut_1.63-1
## [7] sceasy_0.0.6 reticulate_1.14
## [9] viridis_0.5.1 viridisLite_0.3.0
## [11] pheatmap_1.0.12 Rtsne_0.15
## [13] openxlsx_4.1.4 DropletUtils_1.6.1
## [15] scater_1.14.6 ggplot2_3.2.1
## [17] scran_1.14.6 SingleCellExperiment_1.8.0
## [19] SummarizedExperiment_1.16.1 DelayedArray_0.12.2
## [21] BiocParallel_1.20.1 matrixStats_0.55.0
## [23] Biobase_2.46.0 GenomicRanges_1.38.0
## [25] GenomeInfoDb_1.22.0 IRanges_2.20.2
## [27] S4Vectors_0.24.3 BiocGenerics_0.32.0
##
## loaded via a namespace (and not attached):
## [1] readxl_1.3.1 RcppEigen_0.3.3.7.0 igraph_1.2.4.2
## [4] lazyeval_0.2.2 sp_1.3-2 RcppHNSW_0.2.0
## [7] digest_0.6.24 htmltools_0.4.0 magrittr_1.5
## [10] R.utils_2.9.2 xts_0.12-0 colorspace_1.4-1
## [13] rappdirs_0.3.1 haven_2.2.0 xfun_0.12
## [16] dplyr_0.8.4 crayon_1.3.4 RCurl_1.98-1.1
## [19] jsonlite_1.6.1 hexbin_1.28.1 zoo_1.8-7
## [22] glue_1.3.1 gtable_0.3.0 zlibbioc_1.32.0
## [25] XVector_0.26.0 car_3.0-6 BiocSingular_1.2.1
## [28] Rhdf5lib_1.8.0 DEoptimR_1.0-8 HDF5Array_1.14.2
## [31] abind_1.4-5 VIM_5.1.0 scales_1.1.0
## [34] ggplot.multistats_1.0.0 ggthemes_4.2.0 Rcpp_1.0.3
## [37] laeken_0.5.1 dqrng_0.2.1 foreign_0.8-72
## [40] rsvd_1.0.2 proxy_0.4-23 vcd_1.4-5
## [43] RColorBrewer_1.1-2 ellipsis_0.3.0 pkgconfig_2.0.3
## [46] R.methodsS3_1.8.0 farver_2.0.3 nnet_7.3-12
## [49] locfit_1.5-9.1 tidyselect_1.0.0 labeling_0.3
## [52] rlang_0.4.7 munsell_0.5.0 cellranger_1.1.0
## [55] tools_3.6.2 ranger_0.12.1 evaluate_0.14
## [58] stringr_1.4.0 yaml_2.2.1 knitr_1.28
## [61] zip_2.0.4 robustbase_0.93-5 purrr_0.3.3
## [64] formatR_1.7 R.oo_1.23.0 compiler_3.6.2
## [67] beeswarm_0.2.3 curl_4.3 e1071_1.7-3
## [70] smoother_1.1 tibble_3.0.3 statmod_1.4.33
## [73] stringi_1.4.5 RSpectra_0.16-0 forcats_0.4.0
## [76] lattice_0.20-38 Matrix_1.2-18 vctrs_0.3.4
## [79] pillar_1.4.3 lifecycle_0.2.0 lmtest_0.9-37
## [82] BiocNeighbors_1.4.1 data.table_1.12.8 bitops_1.0-6
## [85] irlba_2.3.3 R6_2.4.1 pcaMethods_1.78.0
## [88] gridExtra_2.3 rio_0.5.16 vipor_0.4.5
## [91] codetools_0.2-16 boot_1.3-23 MASS_7.3-51.4
## [94] assertthat_0.2.1 rhdf5_2.30.1 withr_2.1.2
## [97] GenomeInfoDbData_1.2.2 hms_0.5.3 grid_3.6.2
## [100] tidyr_1.0.2 class_7.3-15 rmarkdown_2.1
## [103] DelayedMatrixStats_1.8.0 carData_3.0-3 TTR_0.23-6
## [106] scatterplot3d_0.3-41 ggbeeswarm_0.6.0